Plotting function to reduce code length + simplify. Then, short function to place MLS weight smoothing kernel into same category to make visually consistent, updates are:

  • 3.0 –> 3.6
  • 4.0 –> 4.8
  • 5.0 –> 6.0
  • 6.0 –> 7.2
  • 7.0 –> 8.4
create_plot <- function(dataset, y_est, type, thresh, y_min, y_max, y_lab, add_title = TRUE) {
  plot <- dataset %>%
    ggplot(aes(x = .data[[type]], y = .data[[y_est]], fill = .data[[type]], color = .data[[type]])) +
    geom_rain(alpha = .5, rain.side = 'l',
              boxplot.args = list(color = "black", outlier.shape = NA),
              boxplot.args.pos = list(
                position = ggpp::position_dodgenudge(x = .1), width = 0.1
              )) +
    facet_grid(~study) +
    ylab(y_lab) +
    ylim(y_min,y_max)+
    theme_classic() 
  
  if (add_title) {
    plot <- plot + ggtitle(paste("Distribution by", type, "category (", thresh, "mask)"))
  }
  
  plot <- plot +
    scale_fill_manual(values = pal) +
    scale_color_manual(values = pal) +
    guides(fill = 'none', color = 'none') +
    theme(text = element_text(family = "Times New Roman"),
          axis.text = element_text(size = 12, angle = 45, hjust = 1),
          axis.title = element_text(size = 12),
          legend.text = element_text(size = 12),
          legend.title = element_text(size = 12),
          plot.title = element_text(size = 16))
}

by_sample_conmod <- function(dataset, y_est, y_min, y_max, y_lab) {
  
  # Define a function to create individual plots
  create_plot <- function(data, study) {
    ggplot(data, aes(x = con, y = .data[[y_est]], fill = con, color = con)) +
      geom_rain(alpha = .5, rain.side = 'l',
                boxplot.args = list(color = "black", outlier.shape = NA),
                boxplot.args.pos = list(
                  position = ggpp::position_dodgenudge(x = .1), width = 0.1
                )) +
      facet_grid(~model) +
      ylab(y_lab) +
      ylim(y_min,y_max)+
      theme_classic() +
      scale_fill_manual(values = pal) +
      scale_color_manual(values = pal) +
      guides(fill = 'none', color = 'none') +
      theme(text = element_text(family = "Times New Roman"),
            axis.text = element_text(size = 12, angle = 45, hjust = 1),
            axis.title = element_text(size = 12),
            legend.text = element_text(size = 12),
            legend.title = element_text(size = 12),
            plot.title = element_text(size = 16)) +
      ggtitle(study) 
  }
  
  # Create plots for each study using map
  plot_list <- purrr::map(c("mls", "ahrb", "abcd"), ~ dataset %>%
                            filter(study == .x) %>%
                            create_plot(.x))
  
  # Combine plots into a single patchwork object
  combined_plots <- wrap_plots(plotlist = plot_list)
  
  # Add patchwork annotations
  combined_plots 
}

update_mls_fwhm <- function(df) {
  df <- df %>%
    mutate(fwhm = case_when(
      fwhm == "fwhm-3.0" ~ "fwhm-3.6",
      fwhm == "fwhm-4.0" ~ "fwhm-4.8",
      fwhm == "fwhm-5.0" ~ "fwhm-6.0",
      fwhm == "fwhm-6.0" ~ "fwhm-7.2",
      fwhm == "fwhm-7.0" ~ "fwhm-8.4",
      TRUE ~ as.character(fwhm)  # if none of the conditions match, keep the original value
    ))
  return(df)
}

create_specr_plot <- function(summary_df, est_label) {
  plot_a = plot_curve(df = summary_df, ci = TRUE, desc = FALSE, legend = FALSE, null = 0)
  plot_b <- plot_choices(df = summary_df, choices = c("fwhm", "motion","contrast","model"), desc = F, null = 0) +
    labs(y = "Variables", x = paste("Ordered Specification Curve \n",est_label, "coefficient"))
  
  cowplot::plot_grid(plot_a, plot_b, ncol = 1, align = "v", axis = 'tblr',
                     labels = c('A', 'B'), rel_heights = c(1, 2),
                     label_fontfamily = "Times", label_size = 12)
}

calculate_summary_stats <- function(data, variable, est_type) {
  data %>%
    select(study, {{variable}}) %>% 
    group_by(study) %>% 
    summarise(
      "est_type" = est_type,
      "median" = median({{variable}}),
      "mean" = mean({{variable}}),
      "sd" = sd({{variable}}),
      "min" = min({{variable}}),
      "max" = max({{variable}}) 
    ) %>% 
  kbl(format = "html", 
      booktabs = TRUE) %>% 
  kable_styling(full_width = FALSE)
  
}

The packages are automatically loaded using pacman. The reported .html was last ran on the system: x86_64-apple-darwin17.0 and R version: R version 4.2.1 (2022-06-23) In the Stage 1 PCI Registered Report we are focused on Individual Continouous (intraclass correlation) and the binary/continuous group similarity (jaccard and spearman). This descriptive file includes the output information for the distribution plots, min/max/mean/median of the estimates and the spec plots for each estimate time. This report is for the Session1 between-run estimates

continuous

We stated:

Aim1: the range and distribution of median ICCs across each study (three) and analytic decision category (four) are plotted across suprathreshold task-positive and subthreshold ICCs using Rainclouds (Allen et al., 2019) and the median and standard deviation is reported in a table.

to visualize the ordered median ICCs across the 360 model permutations for suprathreshold task-positive and subthreshold ICCs, specification curve analyses are used (Simonsohn et al., 2020). Specifically, results across the 360 model permutations are reported using a specification curve to represent the range of estimated effects across the variable permutations. This consists of two panels: Panel A represents the ordered ICC coefficients and the ICC’s associated 95% confidence interval colored based on no significance (gray), negative (red) or positive (blue) significance from the Null (Null here is 0) and Panel B represents the analytic decisions from each of the four categories (see Table 1) that produced the median ICC estimates. In the main text, to compare the highest and lowest ICC’s produced by the model permutations, the 25th percentile and 75th percentile median ICC estimates from the 360 models are reported separately for suprathreshold task-positive and subthreshold activation (the specification curve for all ICC estimates for suprathreshold task-positive and subthreshold activation are provided as supplemental information).

Aim2: the range and distribution of median MSBS and MSWS across each study and analytic decision category are plotted across suprathreshold task-positive and subthreshold ICCs using Rainclouds.

two separate specification curve analyses report the ordered median MSBS and MSWS coefficients in one panel and the analytic decisions that produced the MSBS and MSWS estimates in a second panel separately for suprathreshold task-positive and subthreshold activation

group similarity

We stated:

Aim1: For each study, the coefficients are plotted to reflect the distribution and range of coefficients. Both Jaccard’s and Spearman correlation are reported separately.



1 Load data

1.1 ABCD

1.2 AHRB

1.3 MLS

1.4 combine data

2 Smoothing + T to D

2.1 Inherent Smoothing

smoothing_x = c(1.5,2,2.5,3,3.5)
(smoothing_x*4)
## [1]  6  8 10 12 14
cat((smoothing_x*.5)*4)
## 3 4 5 6 7

reporting the smoothness density across 240 group lvl model residual variance maps for ABCD, AHRB and MLS. Five smoothing kernels were used x voxel-size in 2.4mm for abcd/ahrb, resulting in smoothness for ABCD/AHRB and a weight of .50 was used for MLS 4mm voxel-size resulting in smoothness multiples of rcat((smoothing_x*.5)*4).

SmoothEstimate from FSL was used and Rsel^(1/3) is reported as it is “avg FWHM” based on: https://www.jiscmail.ac.uk/cgi-bin/webadmin?A2=FSL;e792b5da.0803

comb_smooth %>% 
  mutate(FWHM_avg = RESEL^(1/3)) %>% 
  ggplot(aes(x=FWHM_avg, fill = sample, colour = sample)) +
  geom_density(alpha = .5) +
  xlab("Avg FWHM") +
  xlim(0,10)+
  ylim(0,.5)+
  scale_fill_manual(values = pal) +  
  scale_colour_manual(values = pal) +
  theme_minimal()

comb_smooth %>% 
  group_by(sample) %>% 
  summarise(mean = mean(RESEL^(1/3)), sd = sd(RESEL^(1/3))) %>% 
  kbl(format = "html", 
      booktabs = TRUE) %>% 
  kable_styling(full_width = FALSE)
sample mean sd
ABCD 4.544900 1.418274
AHRB 4.185944 1.331808
MLS 3.772849 0.957380

2.2 t-stat to cohen’s d

n_range = seq(5, 200, 1)
result_df <- data.frame(n = numeric(0), 
                        t_stat = numeric(0), cohen_d = numeric(0))
for (n in n_range) {
  set.seed(123)
  n <- n
  population_mean <- 0
  mean <- 5
  sd <- 1
  
  # Generate random samples from a t-distribution
  df <- rnorm(n, mean = mean, sd = sd)
  
  
  # Perform a one-sample t-test
  t_test_result <- t.test(df, mu = population_mean)
  t_stat = t_test_result$statistic 
  cohens_d = t_test_result$statistic / sqrt(n)
  result_df <- rbind(result_df, data.frame(n = n, t_stat = t_stat, cohen_d = cohens_d))
}


result_df %>% 
  ggplot(aes(x = n)) +
  geom_line(aes(y = t_stat, color = "T-statistic"), size = .5) +
  geom_line(aes(y = cohen_d, color = "Cohen's d"), size = .5) +
  labs(caption = "Cohen's d = t-stat / sqrt(n)",
       x = "Sample Size (n)",
       y = "Value") +
  scale_color_manual(values = c("T-statistic" = "black", "Cohen's d" = "red")) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

3 Plot distributions

Below, running the steps to summarize the different Intraclass correlation (ICC), Mean Squared Between Subject (MSBS), Mean Square Within Subject (MSWS), and jaccard and spearman similarty for the model combinations 360 across samples

3.1 ICC

Plotting overall and for each of [four] categories

3.1.1 Subthreshold Mask

Creating rainclouds via ggrain

subset <- by_sample_conmod(icc_subthresh, y_est = "median_est", y_min =-.1, y_max=.6, y_lab = "Median ICC")
fwhm_rg = create_plot(icc_subthresh, y_est = "median_est", type = "fwhm",  thresh = "sub-threshold", 
                      y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
motion_rg = create_plot(icc_subthresh, y_est = "median_est", type = "motion", thresh = "sub-threshold", 
                        y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
modeltype_rg = create_plot(icc_subthresh, y_est = "median_est", type = "model", thresh = "sub-threshold",
                           y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
contrast_rg = create_plot(icc_subthresh, y_est = "median_est", type = "con", thresh = "sub-threshold", 
                          y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)

cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

subset

3.1.2 Suprathreshold Mask

subset <- by_sample_conmod(icc_suprathresh, y_est = "median_est", y_min=-.1, y_max=.6, y_lab = "Median ICC")
fwhm_rg = create_plot(icc_suprathresh, y_est = "median_est", type = "fwhm",  thresh = "supra-threshold", 
                      y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
motion_rg = create_plot(icc_suprathresh, y_est = "median_est", type = "motion", thresh = "supra-threshold", 
                        y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
modeltype_rg = create_plot(icc_suprathresh, y_est = "median_est", type = "model", thresh = "supra-threshold",
                           y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
contrast_rg = create_plot(icc_suprathresh, y_est = "median_est", type = "con", thresh = "supra-threshold", 
                          y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)


cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.1.3 Nacc avg Mask

3.1.3.2 Left

fwhm_rg = create_plot(icc_nacc, y_est = "avg_left", "fwhm","Avg Left NAcc", y_min=-.3, y_max=.6, y_lab = "ICC", add_title=FALSE)
motion_rg = create_plot(icc_nacc, y_est = "avg_left", "motion","Avg Left NAcc", y_min=-.3, y_max=.6, y_lab = "ICC", add_title=FALSE)
modeltype_rg = create_plot(icc_nacc, y_est = "avg_left", "model","Avg Left NAcc",y_min=-.3, y_max=.6, y_lab = "ICC", add_title=FALSE)
contrast_rg = create_plot(icc_nacc, y_est = "avg_left", "con","Avg Left NAcc", y_min=-.3, y_max=.6, y_lab = "ICC", add_title=FALSE)

cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for Left NAcc")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for Left NAcc
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

3.2 MSBS

Plotting overall and for each of [four] categories

3.2.1 Subthreshold Mask

subset <- by_sample_conmod(bs_subthresh, y_est = "median_est", y_min=0, y_max=7, y_lab = "Median MSBS")
fwhm_rg = create_plot(bs_subthresh, y_est = "median_est", type = "fwhm",  thresh = "sub-threshold", 
                      y_min=0, y_max=7, y_lab = "Median MSBS", add_title=FALSE)
motion_rg = create_plot(bs_subthresh, y_est = "median_est", type = "motion", thresh = "sub-threshold", 
                        y_min=0, y_max=7, y_lab = "Median MSBS", add_title=FALSE)
modeltype_rg = create_plot(bs_subthresh, y_est = "median_est", type = "model", thresh = "sub-threshold",
                           y_min=0, y_max=7, y_lab = "Median MSBS", add_title=FALSE)
contrast_rg = create_plot(bs_subthresh, y_est = "median_est", type = "con", thresh = "sub-threshold", 
                          y_min=0, y_max=7, y_lab = "Median MSBS", add_title=FALSE)

cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.2.2 Suprathreshold Mask

subset <- by_sample_conmod(bs_suprathresh, y_est = "median_est", y_min=0, y_max=7, y_lab = "Median MSBS")
fwhm_rg = create_plot(bs_suprathresh, y_est = "median_est", type = "fwhm",  thresh = "supra-threshold", 
                      y_min=0, y_max=7, y_lab = "Median MSBS", add_title=FALSE)
motion_rg = create_plot(bs_suprathresh, y_est = "median_est", type = "motion", thresh = "supra-threshold", 
                        y_min=0, y_max=7, y_lab = "Median MSBS", add_title=FALSE)
modeltype_rg = create_plot(bs_suprathresh, y_est = "median_est", type = "model", thresh = "supra-threshold",
                           y_min=0, y_max=7, y_lab = "Median MSBS", add_title=FALSE)
contrast_rg = create_plot(bs_suprathresh, y_est = "median_est", type = "con", thresh = "supra-threshold", 
                          y_min=0, y_max=7, y_lab = "Median MSBS", add_title=FALSE)


cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.3 MSWS

Plotting overall and for each of [four] categories

3.3.1 Subthreshold Mask

subset <- by_sample_conmod(ws_subthresh, y_est = "median_est", y_min=0, y_max=7, y_lab = "Median MSWS")
fwhm_rg = create_plot(ws_subthresh, y_est = "median_est", type = "fwhm",  thresh = "sub-threshold", 
                      y_min=0, y_max=7, y_lab = "Median MSWS", add_title=FALSE)
motion_rg = create_plot(ws_subthresh, y_est = "median_est", type = "motion", thresh = "sub-threshold", 
                        y_min=0, y_max=7, y_lab = "Median MSWS", add_title=FALSE)
modeltype_rg = create_plot(ws_subthresh, y_est = "median_est", type = "model", thresh = "sub-threshold",
                           y_min=0, y_max=7, y_lab = "Median MSWS", add_title=FALSE)
contrast_rg = create_plot(ws_subthresh, y_est = "median_est", type = "con", thresh = "sub-threshold", 
                          y_min=0, y_max=7, y_lab = "Median MSWS", add_title=FALSE)

cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.3.2 Suprathreshold Mask

subset <- by_sample_conmod(ws_suprathresh, y_est = "median_est", y_min=0, y_max=7, y_lab = "Median MSWS")
fwhm_rg = create_plot(ws_suprathresh, y_est = "median_est", type = "fwhm",  thresh = "supra-threshold", 
                      y_min=0, y_max=7, y_lab = "Median MSWS", add_title=FALSE)
motion_rg = create_plot(ws_suprathresh, y_est = "median_est", type = "motion", thresh = "supra-threshold", 
                        y_min=0, y_max=7, y_lab = "Median MSWS", add_title=FALSE)
modeltype_rg = create_plot(ws_suprathresh, y_est = "median_est", type = "model", thresh = "supra-threshold",
                           y_min=0, y_max=7, y_lab = "Median MSWS", add_title=FALSE)
contrast_rg = create_plot(ws_suprathresh, y_est = "median_est", type = "con", thresh = "supra-threshold", 
                          y_min=0, y_max=7, y_lab = "Median MSWS", add_title=FALSE)

cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.4 Similarity

3.4.1 jaccard

subset <- by_sample_conmod(similarity_df, y_est = "jaccard", y_min=0, y_max = 1, y_lab = "Jaccard")

fwhm_rg = create_plot(similarity_df, y_est = "jaccard", type = "fwhm", thresh = "Jaccard", 
                      y_min=0, y_max = 1, y_lab = "Jaccard", add_title=FALSE)
motion_rg = create_plot(similarity_df, y_est = "jaccard", type = "motion", thresh = "Jaccard", 
                        y_min=0, y_max = 1, y_lab = "Jaccard", add_title=FALSE)
modeltype_rg = create_plot(similarity_df, y_est = "jaccard", type = "model", thresh="Jaccrd", 
                           y_min=0, y_max = 1, y_lab = "Jaccard", add_title=FALSE)
contrast_rg = create_plot(similarity_df, y_est = "jaccard", type = "con", thresh = "Jaccard", 
                          y_min=0, y_max = 1, y_lab = "Jaccard", add_title=FALSE)

cat("Jaccard: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast")
## Jaccard: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.4.2 spearman

subset <- by_sample_conmod(similarity_df, y_est = "spearman", y_min=0, y_max = 1, y_lab = "Spearman")

fwhm_rg = create_plot(similarity_df, y_est = "spearman", type = "fwhm", thresh = "Spearman", 
                      y_min=0, y_max = 1, y_lab = "Spearman", add_title=FALSE)
motion_rg = create_plot(similarity_df, y_est = "spearman", type = "motion", thresh = "Spearman", 
                        y_min=0, y_max = 1, y_lab = "Spearman", add_title=FALSE)
modeltype_rg = create_plot(similarity_df, y_est = "spearman", type = "model", thresh="Spearman", 
                           y_min=0, y_max = 1, y_lab = "Spearman", add_title=FALSE)
contrast_rg = create_plot(similarity_df, y_est = "spearman", type = "con", thresh = "Spearman", 
                          y_min=0, y_max = 1, y_lab = "Spearman", add_title=FALSE)

cat("Spearman: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast")
## Spearman: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.5 Group Cohen’s ~ ICC

cohens_sim <- gather(similarity_df, key = "Run", value = "spearman", run1_icc_cohensd:run2_icc_cohensd) %>% 
  mutate(Run = case_when(
    Run == "run1_icc_cohensd" ~ "Run1",
    Run == "run2_icc_cohensd" ~ "Run2",
    TRUE ~ Run
  ))
cat("\tICC, MSBS and MSWS median, miniumum and maximum")
##  ICC, MSBS and MSWS median, miniumum and maximum
calculate_summary_stats(similarity_df, run1_icc_cohensd, "ICC ~ R1 Cohen's D")
study est_type median mean sd min max
abcd ICC ~ R1 Cohen’s D 0.0026053 -0.0196391 0.1424802 -0.4280291 0.2474254
ahrb ICC ~ R1 Cohen’s D 0.1254760 0.0887109 0.2255808 -0.4083736 0.5030614
mls ICC ~ R1 Cohen’s D 0.1099449 0.0802882 0.2146424 -0.3459887 0.4323042
calculate_summary_stats(similarity_df, run2_icc_cohensd, "ICC ~ R2 Cohen's D")
study est_type median mean sd min max
abcd ICC ~ R2 Cohen’s D 0.0351886 0.0002983 0.1545906 -0.4202071 0.2471038
ahrb ICC ~ R2 Cohen’s D 0.1275412 0.1029176 0.2154479 -0.3957997 0.5103372
mls ICC ~ R2 Cohen’s D 0.1224453 0.0767892 0.2349547 -0.3756960 0.4559185
cohens_sim %>% ggplot(aes(x = fwhm, y = spearman, fill = fwhm, color = fwhm)) +
  geom_rain(alpha = .5, rain.side = 'l',
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(
              position = ggpp::position_dodgenudge(x = .2), width = 0.1
            )) +
  facet_grid(~study + Run) +
  theme_classic() +
  ggtitle("Distribution by FWHM category") +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  guides(fill = 'none', color = 'none') +
  theme(text = element_text(family = "Times New Roman"),
        axis.text = element_text(size = 12, angle = 45, hjust = 1),
        axis.title = element_text(size = 12),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        plot.title = element_text(size = 16))

cohens_sim %>% ggplot(aes(x = con, y = spearman, fill = con, color = con)) +
  geom_rain(alpha = .5, rain.side = 'l',
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(
              position = ggpp::position_dodgenudge(x = .2), width = 0.1
            )) +
  facet_grid(~study + Run) +
  theme_classic() +
  ggtitle("Distribution by Contrast category") +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  guides(fill = 'none', color = 'none') +
  theme(text = element_text(family = "Times New Roman"),
        axis.text = element_text(size = 12, angle = 45, hjust = 1),
        axis.title = element_text(size = 12),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        plot.title = element_text(size = 16))

cohens_sim %>% ggplot(aes(x = motion, y = spearman, fill = motion, color = motion)) +
  geom_rain(alpha = .5, rain.side = 'l',
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(
              position = ggpp::position_dodgenudge(x = .2), width = 0.1
            )) +
  facet_grid(~study + Run) +
  theme_classic() +
  ggtitle("Distribution by Motion category") +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  guides(fill = 'none', color = 'none') +
  theme(text = element_text(family = "Times New Roman"),
        axis.text = element_text(size = 12, angle = 45, hjust = 1),
        axis.title = element_text(size = 12),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        plot.title = element_text(size = 16))

cohens_sim %>% ggplot(aes(x = model, y = spearman, fill = model, color = model)) +
  geom_rain(alpha = .5, rain.side = 'l',
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(
              position = ggpp::position_dodgenudge(x = .2), width = 0.1
            )) +
  facet_grid(~study + Run) +
  theme_classic() +
  ggtitle("Distribution by Model category") +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  guides(fill = 'none', color = 'none') +
  theme(text = element_text(family = "Times New Roman"),
        axis.text = element_text(size = 12, angle = 45, hjust = 1),
        axis.title = element_text(size = 12),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        plot.title = element_text(size = 16))

cat("Spearman: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast")
## Spearman: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

3.6 Model Effieincy, ses-1

subset <- by_sample_conmod(efficiency_df, y_est = "eff_est", y_min = 0, y_max = 15, y_lab = "Efficiency Estimate")
motion_rg = create_plot(efficiency_df, y_est = "eff_est", "motion","Efficiency", 
                        y_min = 0, y_max = 15, y_lab = "Efficiency Estimate", add_title=FALSE)
modeltype_rg = create_plot(efficiency_df, y_est = "eff_est", "model","Efficiency", 
                           y_min = 0, y_max = 15, y_lab = "Efficiency Estimate", add_title=FALSE)
contrast_rg = create_plot(efficiency_df, y_est = "eff_est", "con","Efficiency", 
                          y_min = 0, y_max = 15, y_lab = "Efficiency Estimate",  add_title=FALSE)


cat("Plotting A = Motion; B = Model Parameterization, C = Contrast for Efficiency")
## Plotting A = Motion; B = Model Parameterization, C = Contrast for Efficiency
(motion_rg) / (modeltype_rg + contrast_rg) + plot_annotation(tag_levels = c("A","B","C")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

4 Med/Min/Max Across Models

`

4.1 Subtreshold

cat("Subthreshold mask")
## Subthreshold mask
cat("\tICC, MSBS and MSWS median, miniumum and maximum")
##  ICC, MSBS and MSWS median, miniumum and maximum
calculate_summary_stats(icc_subthresh, median_est, "ICC")
study est_type median mean sd min max
abcd ICC 0.0820 0.1121333 0.1009156 -0.042 0.368
ahrb ICC 0.1330 0.1507958 0.0922088 0.028 0.389
mls ICC 0.1185 0.1458833 0.1038028 0.026 0.470
calculate_summary_stats(bs_subthresh, median_est, "MSBS")
study est_type median mean sd min max
abcd MSBS 0.8455 1.0100042 0.7256228 0.091 4.198
ahrb MSBS 0.4780 0.6059083 0.4545722 0.062 2.614
mls MSBS 1.2325 1.7424750 1.5326855 0.096 6.861
calculate_summary_stats(ws_subthresh, median_est, "MSWS")
study est_type median mean sd min max
abcd MSWS 0.6215 0.7196458 0.4435159 0.086 2.294
ahrb MSWS 0.3200 0.3883250 0.2519881 0.052 1.303
mls MSWS 0.8750 1.0128667 0.7300256 0.082 3.279

4.2 Suprathreshold

cat("Suprathreshold mask")
## Suprathreshold mask
cat("\tICC, MSBS and MSWS median, miniumum and maximum")
##  ICC, MSBS and MSWS median, miniumum and maximum
calculate_summary_stats(icc_suprathresh, median_est, "ICC")
study est_type median mean sd min max
abcd ICC 0.098 0.1423417 0.1331955 -0.064 0.439
ahrb ICC 0.176 0.1991250 0.1281665 -0.001 0.522
mls ICC 0.175 0.2061500 0.1292663 0.041 0.550
calculate_summary_stats(bs_suprathresh, median_est, "MSBS")
study est_type median mean sd min max
abcd MSBS 0.592 0.7068333 0.5169213 0.063 2.954
ahrb MSBS 0.326 0.4213875 0.3217035 0.042 1.777
mls MSBS 0.762 1.1030750 0.9815924 0.055 4.261
calculate_summary_stats(ws_suprathresh, median_est, "MSWS")
study est_type median mean sd min max
abcd MSWS 0.4295 0.4840375 0.2843468 0.059 1.539
ahrb MSWS 0.2145 0.2550333 0.1541074 0.037 0.808
mls MSWS 0.5235 0.6093208 0.4407397 0.048 1.911

4.3 simlarity

cat("Similarity median, minimum and maximum for jaccard and spearman")
## Similarity median, minimum and maximum for jaccard and spearman
calculate_summary_stats(similarity_df, jaccard, "Jaccard")
study est_type median mean sd min max
abcd Jaccard 0.0587355 0.1165113 0.1410101 0.0041144 0.5210155
ahrb Jaccard 0.1799773 0.2119861 0.1530871 0.0089624 0.6420259
mls Jaccard 0.3412880 0.3433006 0.1118254 0.1470588 0.6018436
calculate_summary_stats(similarity_df, spearman, "Spearman")
study est_type median mean sd min max
abcd Spearman 0.6263840 0.6685985 0.1758801 0.3406840 0.9535143
ahrb Spearman 0.7323860 0.6835125 0.2169564 0.2208462 0.9560855
mls Spearman 0.8449316 0.8024856 0.1193737 0.4708645 0.9537408

4.4 NAcc

cat("\tICC Right and Left avg ICC, miniumum and maximum")
##  ICC Right and Left avg ICC, miniumum and maximum
calculate_summary_stats(icc_nacc, avg_right, "Right avg ICC")
study est_type median mean sd min max
abcd Right avg ICC 0.0890 0.0938667 0.0815074 -0.064 0.301
ahrb Right avg ICC 0.0255 0.0414333 0.1061659 -0.251 0.419
mls Right avg ICC 0.1025 0.1074417 0.0998838 -0.072 0.402
calculate_summary_stats(icc_nacc, avg_left, "Left avg ICC")
study est_type median mean sd min max
abcd Left avg ICC 0.106 0.1091000 0.0603692 -0.046 0.277
ahrb Left avg ICC 0.136 0.1067750 0.1235957 -0.233 0.455
mls Left avg ICC 0.167 0.1731958 0.0878092 0.027 0.442

5 Specification Curve

Creating data in a format that is compatible with specr. Needs: estimate (i.e., ICC), std.error, conf.high, conf.low.

5.1 ICC

5.1.1 suprathreshold

5.1.1.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

# first, combine independent model vars into string to create average for each model type
icc_suprathresh$model_type <- paste(icc_suprathresh$fwhm, icc_suprathresh$motion,
                                    icc_suprathresh$con,  icc_suprathresh$model,
                                    sep = "_")

# calculate the avg estimate of ICC across study, standard error and +/- 95% confidence interval. In complete version
df_summ <- icc_suprathresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "supra-threshold ICC"
create_specr_plot(df_summ, est_label)

5.1.1.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "supra-threshold ICC"
create_specr_plot(df_summ_subset, est_label)

5.1.2 subthreshold

5.1.2.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

icc_subthresh$model_type <- paste(icc_subthresh$fwhm, icc_subthresh$motion,
                                  icc_subthresh$con,  icc_subthresh$model,
                                  sep = "_")

df_summ <- icc_subthresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "sub-threshold ICC"
create_specr_plot(df_summ, est_label)

5.1.2.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "sub-threshold ICC"
create_specr_plot(df_summ_subset, est_label)

5.1.3 NAcc Avg

5.1.3.1 Left NAcc

creating combined panel 1 and panel 2 for all model permutations first.

# first, combine independent model vars into string to create average for each model type
icc_nacc$model_type <- paste(icc_nacc$fwhm, icc_nacc$motion,
                             icc_nacc$con,  icc_nacc$model,
                             sep = "_")

# calculate the avg estimate of ICC across study, standard error and +/- 95% confidence interval. In complete version
df_summ <- icc_nacc %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(avg_left), std.error = sd(avg_left)/sqrt(length(avg_left))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "Left NAcc ICC"
create_specr_plot(df_summ, est_label)

estimate variability across samples

# median + variable for top
icc_nacc %>% 
  filter(model_type=='8.4_opt1_SgainBase_CueMod' | model_type=='8.4_opt1_LgainBase_CueMod' | model_type=='8.4_opt1_LgainNeut_CueMod')
##         con motion  model fwhm avg_left avg_right study
## 1 LgainBase   opt1 CueMod  8.4    0.188     0.301  abcd
## 2 LgainNeut   opt1 CueMod  8.4    0.084     0.104  abcd
## 3 SgainBase   opt1 CueMod  8.4    0.105     0.169  abcd
## 4 LgainBase   opt1 CueMod  8.4    0.177     0.236  ahrb
## 5 LgainNeut   opt1 CueMod  8.4    0.136     0.050  ahrb
## 6 SgainBase   opt1 CueMod  8.4    0.455     0.419  ahrb
## 7 LgainBase   opt1 CueMod  8.4    0.406     0.291   mls
## 8 LgainNeut   opt1 CueMod  8.4    0.233     0.167   mls
## 9 SgainBase   opt1 CueMod  8.4    0.419     0.290   mls
##                  model_type
## 1 8.4_opt1_LgainBase_CueMod
## 2 8.4_opt1_LgainNeut_CueMod
## 3 8.4_opt1_SgainBase_CueMod
## 4 8.4_opt1_LgainBase_CueMod
## 5 8.4_opt1_LgainNeut_CueMod
## 6 8.4_opt1_SgainBase_CueMod
## 7 8.4_opt1_LgainBase_CueMod
## 8 8.4_opt1_LgainNeut_CueMod
## 9 8.4_opt1_SgainBase_CueMod

5.1.3.2 Right NAcc

creating combined panel 1 and panel 2 for all model permutations first.

# first, combine independent model vars into string to create average for each model type
icc_nacc$model_type <- paste(icc_nacc$fwhm, icc_nacc$motion,
                             icc_nacc$con,  icc_nacc$model,
                             sep = "_")

# calculate the avg estimate of ICC across study, standard error and +/- 95% confidence interval. In complete version
df_summ <- icc_nacc %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(avg_right), std.error = sd(avg_right)/sqrt(length(avg_right))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "Right NAcc ICC"
create_specr_plot(df_summ, est_label)

5.2 MSBS

5.2.1 suprathreshold

5.2.1.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

bs_suprathresh$model_type <- paste(bs_suprathresh$fwhm, bs_suprathresh$motion,
                                   bs_suprathresh$con,  bs_suprathresh$model,
                                   sep = "_")

df_summ <- bs_suprathresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "supra-threshold MSBS"
create_specr_plot(df_summ, est_label)

5.2.1.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "supra-threshold MSBS"
create_specr_plot(df_summ_subset, est_label)

5.2.2 subthreshold

5.2.2.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

bs_subthresh$model_type <- paste(bs_subthresh$fwhm, bs_subthresh$motion,
                                 bs_subthresh$con,  bs_subthresh$model,
                                 sep = "_")


df_summ <- bs_subthresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "sub-threshold MSBS"
create_specr_plot(df_summ, est_label)

5.2.2.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "suprathreshold MSBS"
create_specr_plot(df_summ_subset, est_label)

5.3 MSWS

5.3.1 suprathreshold

5.3.1.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

ws_suprathresh$model_type <- paste(ws_suprathresh$fwhm, ws_suprathresh$motion,
                                   bs_suprathresh$con,  ws_suprathresh$model,
                                   sep = "_")

df_summ <- ws_suprathresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "supra-threshold MSWS"
create_specr_plot(df_summ, est_label)

5.3.1.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "supra-threshold MSWS"
create_specr_plot(df_summ_subset, est_label)

5.3.2 subthreshold

5.3.2.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

ws_subthresh$model_type <- paste(ws_subthresh$fwhm, ws_subthresh$motion,
                                 ws_subthresh$con,  ws_subthresh$model,
                                 sep = "_")

# calculate the avg estimate of ICC across study, standard error and +/- 95% confidence interval. In complete version
df_summ <- ws_subthresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "sub-threshold MSWS"
create_specr_plot(df_summ, est_label)

5.3.2.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subplot
est_label = "sub-threshold MSWS"
create_specr_plot(df_summ_subset, est_label)

5.4 Similarity

5.4.1 Jaccard

5.4.1.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

similarity_df$model_type <- paste(similarity_df$fwhm, similarity_df$motion,
                                  similarity_df$con, similarity_df$model,
                                  sep = "_")

df_summ <- similarity_df %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(jaccard), std.error = sd(jaccard)/sqrt(length(jaccard))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "Jaccard"
create_specr_plot(df_summ, est_label)

5.4.1.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "Jaccard"
create_specr_plot(df_summ_subset, est_label)

5.4.2 Spearman

5.4.2.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

similarity_df$model_type <- paste(similarity_df$fwhm, similarity_df$motion,
                                  similarity_df$con,  similarity_df$model, 
                                  sep = "_")

# calculate the avg estimate of ICC across study, standard error and +/- 95% confidence interval. In complete version
df_summ <- similarity_df %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(spearman), std.error = sd(spearman)/sqrt(length(spearman))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "Spearman"
create_specr_plot(df_summ, est_label)

5.4.2.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "Spearman"
create_specr_plot(df_summ_subset, est_label)

6 ICC parameter subsampling

We state:

Aim 3: evaluates the sample size at which the ICC stabilizes. The chosen pipeline will be based on the highest median ICC across the studies for the suprathreshold task-positive mask from Aim 1a that will be rerun for the ABCD sample. Based on this pipeline, the first-level analysis steps are repeated for N = 525 from the N = 2000 subsample for only the ABCD data. Then, voxelwise_icc within the brain_icc.py script is used to derive estimates of the median ICC, MSBS and MSWS for the between runs (e.g., measurement occasions) reliability across randomly sampled subjects for 25 to 525 subjects in intervals of 50. Similar to the methods in Liu et al. (2023), 100 iterations are performed at each N (with replacement) and the median ICC, the associated MSBS and MSWS estimates are retained from voxelwise_icc. The average and standard error across the 100 iterations is plotted for each interval of N with the y-axis representing the ICC and x-axis representing N. The plotted values will be used to infer change and stability in the estimated median ICCs and variance components across the sample size. If stability is not achieved by N = 500, the sample is extended to N = 1,000 and the analyses are repeated.

icc_subsamp_lg <- abcd_icc_subsamp %>%
  gather(key, value, starts_with(c("icc_", "msbtwn_", "mswthn_"))) %>%
  separate(key, into = c("variable", "mask"), sep = "_") %>%
  spread(variable, value)
n_range = seq(25,525,50)

6.1 ICC

icc_summary <- icc_subsamp_lg %>%
  group_by(mask,subsample_n) %>%
  summarize(mean_icc = mean(icc),
            lower_ci = quantile(icc, 0.025),
            upper_ci = quantile(icc, 0.975))
## `summarise()` has grouped output by 'mask'. You can override using the
## `.groups` argument.
ggplot() +
  # add individual lines of ICC fo subsample_n + add mean
  geom_path(data = icc_subsamp_lg, aes(x = subsample_n, y = icc, group = as.factor(seed)),
            alpha = 0.1) +  # Use geom_path to connect individual points
  geom_line(data = icc_summary, aes(x = subsample_n, y = mean_icc), alpha = .5) + 
  # create ribbon + 95CI upper/lower
  geom_ribbon(data = icc_summary, aes(x = subsample_n, ymin = lower_ci, ymax = upper_ci), 
              alpha = 0.1) +  
  geom_line(data = icc_summary, aes(x = subsample_n, y = lower_ci), linetype = "dashed", color = "red") +
  geom_line(data = icc_summary, aes(x = subsample_n, y = upper_ci), linetype = "dashed", color = "red") +  
  labs(title = "Individual points, Mean & +/-95% CI of ICC point estimate across 100 bootstraps at each N", 
       subtitle = paste0("N=25 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(icc)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(icc)), 2),
                        ") N=525 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(icc)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(icc)), 2)
                        ,")"),
       caption = paste("N range:",
                       paste(n_range, collapse = ", "),
                       "\n100 random iterations per N"),
       x = "", y = "Median ICC") +
  ylim(0, 0.75) +
  facet_wrap(~ mask) +
  theme_minimal()

icc_supra <- icc_subsamp_lg %>%
  filter(mask=="supra") %>% 
  group_by(subsample_n) %>%
  summarize(mean_icc = mean(icc),
            lower_ci = quantile(icc, 0.025),
            upper_ci = quantile(icc, 0.975))

icc_supra_lg = icc_subsamp_lg %>% 
  filter(mask=="supra")

ggplot() +
  # add individual lines of ICC fo subsample_n + add mean
  geom_path(data = icc_supra_lg, aes(x = subsample_n, y = icc, group = as.factor(seed)),
            alpha = 0.1) +  # Use geom_path to connect individual points
  geom_line(data = icc_supra, aes(x = subsample_n, y = mean_icc), alpha = .5) + 
  # create ribbon + 95CI upper/lower
  geom_ribbon(data = icc_supra, aes(x = subsample_n, ymin = lower_ci, ymax = upper_ci), 
              alpha = 0.1) +  
  geom_line(data = icc_supra, aes(x = subsample_n, y = lower_ci), linetype = "dashed", color = "red") +
  geom_line(data = icc_supra, aes(x = subsample_n, y = upper_ci), linetype = "dashed", color = "red") +  
  labs(title = "Individual points, Mean & +/-95% CI of ICC point estimate across 100 bootstraps at each N", 
       subtitle = paste0("N=25 (Max:", 
                        round(max(icc_supra_lg %>% filter(subsample_n == 25) %>% pull(icc)), 2),
                        " Min:", 
                        round(min(icc_supra_lg %>% filter(subsample_n == 25) %>% pull(icc)), 2),
                        ") N=525 (Max:", 
                        round(max(icc_supra_lg %>% filter(subsample_n == 525) %>% pull(icc)), 2),
                        " Min:", 
                        round(min(icc_supra_lg %>% filter(subsample_n == 525) %>% pull(icc)), 2)
                        ,")"),
       caption = paste("N range:",
                       paste(n_range, collapse = ", "),
                       "\n100 random iterations per N"),
       x = "", y = "Median ICC") +
  ylim(0, 0.75) +
  theme_minimal()

6.2 MSBW

msbs_summary <- icc_subsamp_lg %>%
  group_by(mask,subsample_n) %>%
  summarize(mean_msbthn = mean(msbtwn),
            lower_ci = quantile(msbtwn, 0.025),
            upper_ci = quantile(msbtwn, 0.975))
## `summarise()` has grouped output by 'mask'. You can override using the
## `.groups` argument.
ggplot() +
  # add individual lines of ICC fo subsample_n + add mean
  geom_path(data = icc_subsamp_lg, aes(x = subsample_n, y = msbtwn, group = as.factor(seed)),
            alpha = 0.1) +  # Use geom_path to connect individual points
  geom_line(data = msbs_summary, aes(x = subsample_n, y = mean_msbthn), alpha = .5) + 
  # create ribbon + 95CI upper/lower
  geom_ribbon(data = msbs_summary, aes(x = subsample_n, ymin = lower_ci, ymax = upper_ci), 
              alpha = 0.1) +  
  geom_line(data = msbs_summary, aes(x = subsample_n, y = lower_ci), linetype = "dashed", color = "red") +
  geom_line(data = msbs_summary, aes(x = subsample_n, y = upper_ci), linetype = "dashed", color = "red") +  
  labs(title = "Individual points, Mean & +/-95% CI of MSBS point estimate across 100 bootstraps at each N", 
       subtitle = paste0("N=25 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(msbtwn)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(msbtwn)), 2),
                        ") N=525 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(msbtwn)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(msbtwn)), 2)
                        ,")"),
       caption = paste("N range:",
                       paste(n_range, collapse = ", "),
                       "\n100 random iterations per N"),
       x = "", y = "Median MSBS") +
  ylim(0, 2) +
  facet_wrap(~ mask) +
  theme_minimal()

6.3 MSWS

msws_summary <- icc_subsamp_lg %>%
  group_by(mask,subsample_n) %>%
  summarize(mean_mswthn = mean(mswthn),
            lower_ci = quantile(mswthn, 0.025),
            upper_ci = quantile(mswthn, 0.975))
## `summarise()` has grouped output by 'mask'. You can override using the
## `.groups` argument.
ggplot() +
  # add individual lines of ICC fo subsample_n + add mean
  geom_path(data = icc_subsamp_lg, aes(x = subsample_n, y = mswthn, group = as.factor(seed)),
            alpha = 0.1) +  # Use geom_path to connect individual points
  geom_line(data = msws_summary, aes(x = subsample_n, y = mean_mswthn), alpha = .5) + 
  # create ribbon + 95CI upper/lower
  geom_ribbon(data = msws_summary, aes(x = subsample_n, ymin = lower_ci, ymax = upper_ci), 
              alpha = 0.1) +  
  geom_line(data = msws_summary, aes(x = subsample_n, y = lower_ci), linetype = "dashed", color = "red") +
  geom_line(data = msws_summary, aes(x = subsample_n, y = upper_ci), linetype = "dashed", color = "red") +  
  labs(title = "Individual points, Mean & +/-95% CI of MSWS point estimate across 100 bootstraps at each N", 
       subtitle = paste0("N=25 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(msbtwn)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(msbtwn)), 2),
                        ") N=525 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(msbtwn)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(msbtwn)), 2)
                        ,")"),
       caption = paste("N range:",
                       paste(n_range, collapse = ", "),
                       "\n100 random iterations per N"),
       x = "", y = "Median MSWS") +
  ylim(0, 2) +
  facet_wrap(~ mask) +
  theme_minimal()